//==============================================================================
// Project:		Wealth Transfers and their economic effects
// File name:	IWP 4
// Objective: 	Part 4 of IWP work - imputing parents' wealth back to 2002 and calculating parents' percentile ranks
//
//==============================================================================

clear all

************************
*** Data import ********
************************

cd "XXXX" //set content directory here

use combined_filtered_restricted.dta, replace

merge m:1 hhpxid wave using IWP_data_2.dta //from IWP 2.do, merged such that the variables are matched to the person's partner
drop if _merge == 2
drop _merge

merge m:1 xwaveid wave using individual_weight_vars_restricted.dta //individual wealth variables 
drop if _merge == 2
drop _merge

drop if hgint == 0 //drop non-responders

xtset xwaveid wave

****************************************************
*** Create gender-free person-specific variables ***
****************************************************

*Note: variables created here (demographic characteristics of responding persons) are matched to those created about the parents of the child generation in IWP 1.do

gen net_worth = pwassei - pwdebti //individual net worth
replace net_worth_partner = 0 if net_worth_partner == .
gen net_worth_couple = net_worth + net_worth_partner //same as net_worth for single people
gen log_net_worth_couple = log(net_worth_couple)

gen yob_squ = yob^2 //year of birth squared
gen yob_cub = yob^3 //year of birth cubed

gen university = . //indicator for whether attended university
replace university = 1 if edhigh1 == 1 | edhigh1 == 2 | edhigh1 == 3 //went to university
replace university = 0 if edhigh1 == 4 | edhigh1 == 5 | edhigh1 == 8 | edhigh1 == 9 //didn't go to university

gen school = edhists //indicator for highest level of schooling
replace school = 3 if edhists >= 3 & edhists <= 6 // 1 is grade 12, 2 is grade 11, 3 is highschool but not higher than grade 10, 4 is primary school and special needs (these match the categories from IWP 1)
replace school = 4 if edhists >= 7 & edhists <= 9
replace school = . if edhists < 0 

gen birth_country = anbcob //indicator for country of birth
replace birth_country = . if anbcob == -4 //1 is Australia, 2 is main English speaking, 3 is other (these match the categories from IWP 1)
tab birth_country,m

*****************************************************
*** Create equivalent variables about the partner ***
*****************************************************
*Note: these variables draw from IWP 2.do

egen yob_partner_max = max(yob_partner), by(xwaveid) //partner year of birth
gen yob_partner_max_squ = yob_partner_max^2 //partner year of birth squared
gen yob_partner_max_cub = yob_partner_max^3 //partner year of birth cubed
egen university_partner_max = max(university_partner), by(xwaveid) //indicator for whether partner attended university
egen school_partner_max = max(school_partner), by(xwaveid) //indicator for partner's highest level of schooling
egen birth_country_partner_max = max(birth_country_partner), by(xwaveid) // indicator for partner's country of birth
egen sex_partner_max = max(sex_partner), by(xwaveid) //indicator for partner's gender

*****************************************************************
*** Create separate male and female person-specific variables ***
*****************************************************************

*Male variables
*Note: If male same sex partners, take the male variable from the longer lived person. If female same sex partners, leave blank.

gen school_male = school if sex == 1
replace school_male = school_partner_max if sex == 2
replace school_male = . if sex == 2 & sex_partner_max == 2

gen university_male = university if sex == 1
replace university_male = university_partner_max if sex == 2
replace university_male = . if sex == 2 & sex_partner_max == 2

gen birth_country_male = birth_country if sex == 1
replace birth_country_male = birth_country_partner_max if sex == 2
replace birth_country_male = . if sex == 2 & sex_partner_max == 2

gen yob_male = yob if sex == 1
replace yob_male = yob_partner_max if sex == 2
replace yob_male = . if sex == 2 & sex_partner_max == 2

gen yob_male_squ = yob_male^2 
gen yob_male_cub = yob_male^2 

*Female variables
*Note: If female same sex partners, take the female variable from the longer lived person. If male same sex partners, leave blank.

gen school_female = school if sex == 2
replace school_female = school_partner_max if sex == 1
replace school_female = . if sex == 1 & sex_partner_max == 1

gen university_female = university if sex == 2
replace university_female = university_partner_max if sex == 1
replace university_female = . if sex == 1 & sex_partner_max == 1

gen birth_country_female = birth_country if sex == 2
replace birth_country_female = birth_country_partner_max if sex == 1
replace birth_country_female = . if sex == 1 & sex_partner_max == 1

gen yob_female = yob if sex == 2
replace yob_female = yob_partner_max if sex == 1
replace yob_female = . if sex == 1 & sex_partner_max == 1

gen yob_female_squ = yob_female^2 
gen yob_female_cub = yob_female^2 

*************************************
*** Create couple-level variables ***
*************************************

gen wave_squ = wave^2 //wave squared
gen wave_cub = wave^3 //wave cubed

gen child_63_73 = 0 //flag for having a child in the child cohort
forvalues x = 1/14{
gen ncage`x'_yob = wave + 2000 - ncage`x'
replace child_63_73 = 1 if ncage`x'_yob >= 1944 &  ncage`x'_yob <= 1954
}
forvalues x = 1/13{
gen rcage`x'_yob = wave + 2000 - rcage`x'
replace child_63_73 = 1 if rcage`x'_yob >= 1944 &  rcage`x'_yob <= 1954
}

gen children = tcnr //number of children

**********************************
*** Drop unwanted observations ***
**********************************
*Similar to IWP 3, but need to keep observations of people who have a partner who will die but hasn't yet died.

egen yodeath_partner_max = max(yodeath_partner), by(xwaveid)
gen partner_has_died = 1 if yodeath_partner_max != -1 & yodeath_partner_max != . & yodeath_partner_max < wave + 2000 //flag for a person having a partner who has died by the end of the year in question
egen max_partner_has_died = max(partner_has_died), by(xwaveid)
drop if max_partner_has_died != 1 //drop people who do and will not have a partner who dies

drop if child_63_73 == 0 //drop if doesn't have a child in the child cohort

drop if wave != 2 & wave != 6 & wave != 10 & wave != 14 & wave != 18 //drop observations not in the waves for which wealth is recorded

*****************
*** Fit model ***
*****************

*Model 1 (model 3 from IWP 3) - only wave and birth_country_male are statistically significant - try removing all others
xtreg log_net_worth_couple yob_male yob_male_squ wave i.school_male i.school_female university_male university_female i.birth_country_male if wlrb__r > 0, re

*Model 2 - all are statistically significant, check to see if wave and birth_country_male should be interacted
xtreg log_net_worth_couple wave i.birth_country_male if wlrb__r > 0, re

*Model 3 - interaction not statistically significant, so don't include it - model 2 is preferred
xtreg log_net_worth_couple wave i.birth_country_male c.wave#i.birth_country_male if wlrb__r > 0, re

*Test for individual-specific effect
xtreg log_net_worth_couple wave i.birth_country_male if wlrb__r > 0, re
xttest0 //Reject H0 (p = 0.0000), hence RE is preferred over pooled OLS

*Robust Hausman test for whether RE is biased 
tab birth_country_male, generate(BCM) //create indicator variables for birth_country_male because xtoverid doesn't accept the i. notation
xtreg log_net_worth_couple wave BCM2 BCM3 if wlrb__r > 0, re
xtoverid //Do not reject H0 (p = 0.3313), hence RE is preferred over FE

xtreg log_net_worth_couple wave i.birth_country_male if wlrb__r > 0, re

************************************************************************
*** Impute parent_WAD and parent_WAD_expected to 2002 using model 2  ***
************************************************************************
*Note: model 2 transforms to parent_WAD = exp(_b[wave] * wave) * constant, where the constant incorporates the other covariants and the error term. The constant can be calculated directly; hence only _b[wave] is needed

clear

use IWP_data_4.dta, replace

gen constant = parent_WAD/exp(_b[wave] * wave) //the equation above re-arranged

codebook constant

gen parent_WAD_imputed_2 = constant * exp(_b[wave] * 2) //parent_WAD imputed to wave 2

gen constant_2 = parent_WAD_expected/exp(_b[wave] * wave) //the equation above re-arranged

gen parent_WAD_expected_imp_2 = constant_2 * exp(_b[wave] * 2) //parent_WAD_expected imputed to wave 2

*****************************************
*** Additional cleaning and renaming  ***
*****************************************

replace wave = 2 //values are all for wave 2

drop if parent_WAD_imputed_2 == .

keep xwaveid parent_WAD_imputed_2 parent_WAD_expected_imp_2 sample* wave yob* wlra_s

rename parent_WAD_imputed_2 net_worth_couple
rename parent_WAD_expected_imp_2 net_worth_couple_expected


*****************************************
*** Compute adult percentile rankings  **
*****************************************

local samples 2pc 10pc 20pc
foreach y of local samples{
		xtile net_worth_couple_ptile_`y' = net_worth_couple [pweight = wlra_s] if sample_`y'_zero == 1, n(100)
		xtile net_worth_couple_exp_ptile_`y' = net_worth_couple_expected [pweight = wlra_s] if sample_`y'_zero == 1, n(100)
	}
	
	
save IWP_data_5.dta, replace	













